/*  
Creates table of summary statistics (table 1 in paper). 

Requires the clean dataset:
survey_level_data.dta

Arianna Salazar Miranda
*/

**************************************************************************************************
*Replace with path to data
**************************************************************************************************

clear all
global project "/Users/arianna/Dropbox (MIT)/emporis_project/paper/submission_PLOS/data_repository"

**************************************************************************************************
*open survey database and collapse score means
**************************************************************************************************

use "${project}/Data/survey_level_data.dta", clear

**************************************************************************************************
*Cleaning 
**************************************************************************************************

drop if officialname=="Mark Twain Building"
drop if officialname=="Empire State Building"
drop if officialname=="Byrd's Lofts"


collapse(mean) score, by(buildingnumberebn)
gen tag_survey = 1

tempfile m
save `m', replace

**************************************************************************************************
*Merge building characteristics
**************************************************************************************************

use "${project}/Data/building_level_data.dta", clear
merge 1:1 buildingnumberebn using `m', assert(1 3) nogenerate

merge 1:1 buildingnumberebn using "${project}/Data/building_characteristics.dta", assert(1 3) nogenerate

**************************************************************************************************
*generate quartile variables
**************************************************************************************************

sum r50_count_picData_2014  ,d

gen percentile=.

*bottom
replace percentile=1 if (r50_count_picData_2011==0)
*top*
replace percentile=2 if (r50_count_picData_2011>0)


count if percentile==1
count if percentile==1 & tag_survey==1

count if percentile==2
count if percentile==2 & tag_survey==1


**************************************************************************************************
*TABLE 1
**************************************************************************************************

*panoramio 2014*

local var r50_count_picData_2014

*all building sample*
sum `var'

*all survey sample*
sum `var' if tag_survey==1

*bottom building sample*
sum `var' if percentile==1

*bottom survey sample*
sum `var' if tag_survey==1 & percentile==1

*top building sample*
sum `var' if percentile==2

*top survey sample*
sum `var' if tag_survey==1 & percentile==2


**************************************************************************************************

*panoramio 2011*
local var r50_count_picData_2011

*all building sample*
sum `var'

*all survey sample*
sum `var' if tag_survey==1

*bottom building sample*
sum `var' if percentile==1

*bottom survey sample*
sum `var' if tag_survey==1 & percentile==1

*top building sample*
sum `var' if percentile==2

*top survey sample*
sum `var' if tag_survey==1 & percentile==2

**************************************************************************************************

*flickr*
local var r50_count_picData_flickr

*all building sample*
sum `var'

*all survey sample*
sum `var' if tag_survey==1

*bottom building sample*
sum `var' if percentile==1

*bottom survey sample*
sum `var' if tag_survey==1 & percentile==1

*top building sample*
sum `var' if percentile==2

*top survey sample*
sum `var' if tag_survey==1 & percentile==2

**************************************************************************************************

*mean score*
local var score

*all building sample*
sum `var'

*bottom building sample*
sum `var' if percentile==1

*top building sample*
sum `var' if percentile==2

**************************************************************************************************

*building height*
local var building_height


**************
*FOR ALL BUILDINGS IN SAMPLE VARIABLE IS NOT AVAILABLE FOR PUBLIC USE*
**************

*all building sample*
*sum `var'

*all survey sample*
sum `var' if tag_survey==1


**************
*FOR ALL BUILDINGS IN SAMPLE VARIABLE IS NOT AVAILABLE FOR PUBLIC USE*
**************
*bottom building sample*
*sum `var' if percentile==1

*bottom survey sample*
sum `var' if tag_survey==1 & percentile==1

**************
*FOR ALL BUILDINGS IN SAMPLE VARIABLE IS NOT AVAILABLE FOR PUBLIC USE*
**************
*top building sample*
*sum `var' if percentile==2

*top survey sample*
sum `var' if tag_survey==1 & percentile==2

**************************************************************************************************

*building year*
local var building_year

**************
*FOR ALL BUILDINGS IN SAMPLE VARIABLE IS NOT AVAILABLE FOR PUBLIC USE*
**************
*all building sample*
*sum `var'

*all survey sample*
sum `var' if tag_survey==1

**************
*FOR ALL BUILDINGS IN SAMPLE VARIABLE IS NOT AVAILABLE FOR PUBLIC USE*
**************
*bottom building sample*
*sum `var' if percentile==1

*bottom survey sample*
sum `var' if tag_survey==1 & percentile==1

**************
*FOR ALL BUILDINGS IN SAMPLE VARIABLE IS NOT AVAILABLE FOR PUBLIC USE*
**************
*top building sample*
*sum `var' if percentile==2

*top survey sample*
sum `var' if tag_survey==1 & percentile==2

